In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn import metrics
import plotly.express as px
from warnings import filterwarnings 
filterwarnings("ignore")
D:\anaconda files\lib\site-packages\scipy\__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.25.0 is required for this version of SciPy (detected version 1.26.4
  warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
In [2]:
data=pd.read_csv("C:\\Users\\laxma\\Downloads\\road_accident_data_by_vehicle_type.csv")
data
Out[2]:
Location Motor Car Dual Purpose Vehicle Lorry Cycle Motor Cycle/Moped Three wheeler Articulated Vehicle, prime mover SLT Bus Private Bus Intercity Bus Land Vehicle/Tractor Animal drawn vehicle or rider on animal Other
0 Ampara 59 102 121 93 575 169 4 15 35 1 41 4 0
1 Anuradhapura 116 224 246 158 845 196 2 28 104 3 72 0 6
2 Badulla (Badulla & Bandarawela Divisions) 82 121 171 20 266 219 2 58 78 1 16 0 5
3 Batticaloa 26 43 30 64 224 52 4 9 25 1 24 1 2
4 Chilaw (Chilaw & Puttalam Divisions) 102 213 224 134 530 163 14 23 84 1 19 1 3
5 Colombo (Colombo-South, North, Central, Mt. La... 4708 2675 1686 221 2835 2804 110 190 1194 4 59 0 63
6 Galle (Galle & Elpitiya Divisions) 488 377 348 216 1346 586 16 73 224 3 16 0 9
7 Gampaha (Gampaha, Negombo & Kelaniya Divisions) 2018 1688 1352 317 2598 1466 175 123 590 9 38 4 6
8 Jaffna (Jaffna & KKS Divisions) 32 64 46 73 213 63 1 8 21 0 28 0 0
9 Kalutara (Kalutara & Panadura Divisions) 381 398 337 226 1022 525 15 53 238 4 15 0 6
10 Kandy (Kandy & Gampola Divisions) 735 637 526 29 746 895 4 148 403 4 10 1 18
11 Kegalle (Kegalle & Seethawaka Divisions) 281 323 357 23 500 545 11 82 224 1 15 0 12
12 Kilinochchi (Kilinochchi & Mankulam Divisions) 5 74 93 30 120 15 3 21 42 0 10 0 1
13 Kurunegala (Kurunegala, Kuliyapitiya & Nikawer... 333 428 522 193 1349 433 16 97 218 4 35 0 17
14 Mannar 12 35 26 18 71 35 0 1 8 0 7 0 0
15 Matale 118 192 246 68 384 306 8 50 110 3 15 0 7
16 Matara 138 200 163 107 593 297 0 26 131 2 13 0 1
17 Monaragala 64 104 162 58 389 197 1 21 47 0 27 0 2
18 Mulathivu 1 6 2 9 5 0 0 0 3 0 1 0 0
19 Nuwara-Eliya (Nuwara-Eliya & Hatton Divisions) 80 136 130 12 89 247 4 32 83 1 6 0 14
20 Polonnaruwa 36 92 136 63 348 95 1 18 31 1 36 0 0
21 Ratnapura 182 234 321 42 553 443 9 38 155 2 8 0 0
22 Tangalle (Tangalle Division) 59 236 148 66 480 110 11 34 70 0 36 0 2
23 Trincomalee (Trincomalee & Kantale Divisions) 23 72 78 63 286 129 2 12 30 0 27 3 4
24 Vavuniya 4 15 32 26 66 20 0 3 3 0 2 0 0
In [3]:
data.head()
Out[3]:
Location Motor Car Dual Purpose Vehicle Lorry Cycle Motor Cycle/Moped Three wheeler Articulated Vehicle, prime mover SLT Bus Private Bus Intercity Bus Land Vehicle/Tractor Animal drawn vehicle or rider on animal Other
0 Ampara 59 102 121 93 575 169 4 15 35 1 41 4 0
1 Anuradhapura 116 224 246 158 845 196 2 28 104 3 72 0 6
2 Badulla (Badulla & Bandarawela Divisions) 82 121 171 20 266 219 2 58 78 1 16 0 5
3 Batticaloa 26 43 30 64 224 52 4 9 25 1 24 1 2
4 Chilaw (Chilaw & Puttalam Divisions) 102 213 224 134 530 163 14 23 84 1 19 1 3
In [4]:
data.tail()
Out[4]:
Location Motor Car Dual Purpose Vehicle Lorry Cycle Motor Cycle/Moped Three wheeler Articulated Vehicle, prime mover SLT Bus Private Bus Intercity Bus Land Vehicle/Tractor Animal drawn vehicle or rider on animal Other
20 Polonnaruwa 36 92 136 63 348 95 1 18 31 1 36 0 0
21 Ratnapura 182 234 321 42 553 443 9 38 155 2 8 0 0
22 Tangalle (Tangalle Division) 59 236 148 66 480 110 11 34 70 0 36 0 2
23 Trincomalee (Trincomalee & Kantale Divisions) 23 72 78 63 286 129 2 12 30 0 27 3 4
24 Vavuniya 4 15 32 26 66 20 0 3 3 0 2 0 0
In [5]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 14 columns):
 #   Column                                   Non-Null Count  Dtype 
---  ------                                   --------------  ----- 
 0   Location                                 25 non-null     object
 1   Motor Car                                25 non-null     int64 
 2   Dual Purpose Vehicle                     25 non-null     int64 
 3   Lorry                                    25 non-null     int64 
 4   Cycle                                    25 non-null     int64 
 5   Motor Cycle/Moped                        25 non-null     int64 
 6   Three wheeler                            25 non-null     int64 
 7   Articulated Vehicle, prime mover         25 non-null     int64 
 8   SLT Bus                                  25 non-null     int64 
 9   Private Bus                              25 non-null     int64 
 10  Intercity Bus                            25 non-null     int64 
 11  Land Vehicle/Tractor                     25 non-null     int64 
 12  Animal drawn vehicle or rider on animal  25 non-null     int64 
 13  Other                                    25 non-null     int64 
dtypes: int64(13), object(1)
memory usage: 2.9+ KB
In [6]:
data.describe()
Out[6]:
Motor Car Dual Purpose Vehicle Lorry Cycle Motor Cycle/Moped Three wheeler Articulated Vehicle, prime mover SLT Bus Private Bus Intercity Bus Land Vehicle/Tractor Animal drawn vehicle or rider on animal Other
count 25.000000 25.000000 25.000000 25.000000 25.000000 25.000000 25.000000 25.000000 25.000000 25.000000 25.000000 25.000000 25.000000
mean 403.320000 347.560000 300.120000 93.160000 657.320000 400.400000 16.520000 46.520000 166.040000 1.800000 23.040000 0.560000 7.120000
std 987.465979 589.583474 396.740818 83.097673 718.236714 597.698224 39.428332 48.523293 254.023536 2.101587 17.350504 1.227464 12.839782
min 1.000000 6.000000 2.000000 9.000000 5.000000 0.000000 0.000000 0.000000 3.000000 0.000000 1.000000 0.000000 0.000000
25% 32.000000 74.000000 93.000000 29.000000 224.000000 95.000000 1.000000 15.000000 31.000000 0.000000 10.000000 0.000000 0.000000
50% 82.000000 192.000000 163.000000 64.000000 480.000000 197.000000 4.000000 28.000000 83.000000 1.000000 16.000000 0.000000 3.000000
75% 281.000000 323.000000 337.000000 134.000000 746.000000 443.000000 11.000000 58.000000 218.000000 3.000000 35.000000 0.000000 7.000000
max 4708.000000 2675.000000 1686.000000 317.000000 2835.000000 2804.000000 175.000000 190.000000 1194.000000 9.000000 72.000000 4.000000 63.000000
In [7]:
data.isnull().sum()
Out[7]:
Location                                   0
Motor Car                                  0
Dual Purpose Vehicle                       0
Lorry                                      0
Cycle                                      0
Motor Cycle/Moped                          0
Three wheeler                              0
Articulated Vehicle, prime mover           0
SLT Bus                                    0
Private Bus                                0
Intercity Bus                              0
Land Vehicle/Tractor                       0
Animal drawn vehicle or rider on animal    0
Other                                      0
dtype: int64
In [8]:
data.duplicated().sum()
Out[8]:
0
In [9]:
data.shape
Out[9]:
(25, 14)
In [10]:
#VISUALIZATION
In [11]:
plt.scatter(data['Cycle'],data['SLT Bus'])
plt.xticks(rotation=90)
plt.show()
In [12]:
fig=px.bar(data,x='Lorry',y='Location',color='Lorry')
fig.show()
In [13]:
fig=px.violin(data,x='Intercity Bus',y='SLT Bus',color='Intercity Bus')
fig.show()
In [14]:
fig=px.bar(data,x='Other',y='Land Vehicle/Tractor',color='Land Vehicle/Tractor')
fig.show()
In [15]:
plt.scatter(data['Dual Purpose Vehicle'],data['Cycle'])
plt.bar(data['Motor Car'],data['SLT Bus'],color='red')
plt.xticks(rotation=90)
plt.show()
In [16]:
plt.figure(figsize=(10,4))
sns.countplot(x='Animal drawn vehicle or rider on animal', data=data, color='cyan')
plt.title('Animal drawn vehicle v/s rider on animal')
plt.show()
In [17]:
plt.figure(figsize=(10,4))
top_car = data['Other'].value_counts().nlargest(10)
sns.countplot(y=data.Other,color='green')
Out[17]:
<AxesSubplot:xlabel='count', ylabel='Other'>
In [18]:
sns.lineplot(x='Cycle', y='Articulated Vehicle, prime mover', data=data).set_title('Variation of Cycle with Articulated Vehicle, prime mover')
Out[18]:
Text(0.5, 1.0, 'Variation of Cycle with Articulated Vehicle, prime mover')
In [19]:
sns.barplot(data['Land Vehicle/Tractor'],data['Motor Cycle/Moped'],color='r')
plt.xticks(rotation=90)
plt.show()
In [20]:
plt.figure(figsize=(8, 4))
sns.scatterplot(data=data, x='Animal drawn vehicle or rider on animal', y='Location')
plt.title('Animal drawn vehicle or rider on animalin the Location')
plt.xlabel('Animal drawn vehicle or rider on animal')
plt.ylabel('Location')
plt.show()
In [21]:
sns.displot(data["Dual Purpose Vehicle"])
Out[21]:
<seaborn.axisgrid.FacetGrid at 0x1b0ce0c3fa0>
In [22]:
sns.relplot(x='Lorry',y='Private Bus',data=data)
Out[22]:
<seaborn.axisgrid.FacetGrid at 0x1b0ce17ceb0>
In [23]:
sns.countplot(x='Land Vehicle/Tractor',data=data)
plt.xticks(rotation=90)
Out[23]:
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18]),
 [Text(0, 0, '1'),
  Text(1, 0, '2'),
  Text(2, 0, '6'),
  Text(3, 0, '7'),
  Text(4, 0, '8'),
  Text(5, 0, '10'),
  Text(6, 0, '13'),
  Text(7, 0, '15'),
  Text(8, 0, '16'),
  Text(9, 0, '19'),
  Text(10, 0, '24'),
  Text(11, 0, '27'),
  Text(12, 0, '28'),
  Text(13, 0, '35'),
  Text(14, 0, '36'),
  Text(15, 0, '38'),
  Text(16, 0, '41'),
  Text(17, 0, '59'),
  Text(18, 0, '72')])
In [24]:
sns.boxplot(x='Land Vehicle/Tractor',y='Cycle',data=data)
Out[24]:
<AxesSubplot:xlabel='Land Vehicle/Tractor', ylabel='Cycle'>
In [25]:
sns.violinplot(x='Intercity Bus',y='Animal drawn vehicle or rider on animal',data=data)
Out[25]:
<AxesSubplot:xlabel='Intercity Bus', ylabel='Animal drawn vehicle or rider on animal'>
In [26]:
#MODEL BUILDING
In [27]:
data['months']=2024-data['Other']
data.drop('Other',axis=1,inplace=True)
In [28]:
data.head()
Out[28]:
Location Motor Car Dual Purpose Vehicle Lorry Cycle Motor Cycle/Moped Three wheeler Articulated Vehicle, prime mover SLT Bus Private Bus Intercity Bus Land Vehicle/Tractor Animal drawn vehicle or rider on animal months
0 Ampara 59 102 121 93 575 169 4 15 35 1 41 4 2024
1 Anuradhapura 116 224 246 158 845 196 2 28 104 3 72 0 2018
2 Badulla (Badulla & Bandarawela Divisions) 82 121 171 20 266 219 2 58 78 1 16 0 2019
3 Batticaloa 26 43 30 64 224 52 4 9 25 1 24 1 2022
4 Chilaw (Chilaw & Puttalam Divisions) 102 213 224 134 530 163 14 23 84 1 19 1 2021
In [29]:
data.rename(columns={'Motor Car':'car', 'Dual Purpose Vehicle':'bike', 'Lorry':'6 wheeler', 'Cycle':'bycycle'},inplace=True)
In [30]:
data.columns
Out[30]:
Index(['Location', 'car', 'bike', '6 wheeler', 'bycycle', 'Motor Cycle/Moped',
       'Three wheeler', 'Articulated Vehicle, prime mover', 'SLT Bus',
       'Private Bus', 'Intercity Bus', 'Land Vehicle/Tractor',
       'Animal drawn vehicle or rider on animal', 'months'],
      dtype='object')
In [31]:
cat_cols=['6 wheeler','Intercity Bus','SLT Bus','Three wheeler']
i=0
while i<4:
    fig=plt.figure(figsize=[10,6])
    
    plt.subplot(1,2,1)
    sns.countplot(x=cat_cols[i],data=data)
    plt.xticks(rotation=90)
    i += 1
    
    plt.subplot(1,2,2)
    sns.countplot(x=cat_cols[i],data=data)
    
    i += 1
    
    plt.xticks(rotation=90)
    plt.show()
In [32]:
num_cols=['Motor Cycle/Moped','bike','Land Vehicle/Tractor','Private Bus']
i=0
while i<4:
    fig=plt.figure(figsize=[13,3])
    plt.subplot(1,2,1)
    sns.violinplot(x=num_cols[i],data=data)
    
    i += 1
    
    plt.subplot(1,2,2)
    sns.violinplot(x=num_cols[i],data=data)
    
    i += 1
    
    plt.show()
In [33]:
data.drop(labels = 'Location', axis = 1, inplace = True)
In [34]:
data = pd.get_dummies(data=data, drop_first = True)
In [35]:
sns.heatmap(data.corr(), annot=True, cmap="RdBu")
plt.show()
In [36]:
data.corr()['Motor Cycle/Moped']
Out[36]:
car                                        0.862189
bike                                       0.920462
6 wheeler                                  0.951255
bycycle                                    0.867273
Motor Cycle/Moped                          1.000000
Three wheeler                              0.895465
Articulated Vehicle, prime mover           0.868628
SLT Bus                                    0.821950
Private Bus                                0.893898
Intercity Bus                              0.832529
Land Vehicle/Tractor                       0.550981
Animal drawn vehicle or rider on animal    0.276366
months                                    -0.694734
Name: Motor Cycle/Moped, dtype: float64
In [37]:
x=data.drop('Motor Cycle/Moped', axis=1)
y=data['Animal drawn vehicle or rider on animal']
In [38]:
x.head()
Out[38]:
car bike 6 wheeler bycycle Three wheeler Articulated Vehicle, prime mover SLT Bus Private Bus Intercity Bus Land Vehicle/Tractor Animal drawn vehicle or rider on animal months
0 59 102 121 93 169 4 15 35 1 41 4 2024
1 116 224 246 158 196 2 28 104 3 72 0 2018
2 82 121 171 20 219 2 58 78 1 16 0 2019
3 26 43 30 64 52 4 9 25 1 24 1 2022
4 102 213 224 134 163 14 23 84 1 19 1 2021
In [39]:
y.head()
Out[39]:
0    4
1    0
2    0
3    1
4    1
Name: Animal drawn vehicle or rider on animal, dtype: int64
In [40]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.2,random_state=42)
print("x train: ",x_train.shape)
print("x test: ",x_test.shape)
print("y train: ",y_train.shape)
print("y test: ",y_test.shape)
x train:  (20, 12)
x test:  (5, 12)
y train:  (20,)
y test:  (5,)
In [41]:
model = LinearRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
In [42]:
print("coefficients:",model.coef_)
print("intercept:", model.intercept_)
coefficients: [ 9.29927312e-19 -1.00516561e-17 -1.31644212e-18 -3.10400721e-18
 -3.01568310e-18  8.58355465e-17 -1.43662948e-17  2.61252411e-17
 -1.63154583e-17  1.63796059e-17  1.00000000e+00 -8.71101313e-17]
intercept: 1.7524870443708096e-13
In [43]:
from sklearn.metrics import mean_squared_error
mse=mean_squared_error(y_test,y_pred)
print("mean squared error:",mse)
mean squared error: 3.511108096713696e-30
In [44]:
error=y_test,-y_pred
print(error)
(8     0
16    0
0     4
23    3
11    0
Name: Animal drawn vehicle or rider on animal, dtype: int64, array([ 1.17472226e-15,  1.07027222e-15, -4.00000000e+00, -3.00000000e+00,
       -6.60423143e-16]))
In [45]:
import seaborn as sns
import matplotlib.pyplot as plt

error=y_test-y_pred
data=pd.DataFrame({'y_pred':y_pred, 'error': error})
sns.regplot(x='y_pred', y='error', data=data)
plt.show()
In [ ]: